##This conducts the analysis of idaho employees discussed in the appendix (3.3)
##The code in "2-Employees Main.R" needs one of the outputs from this file
##so you run this first if trying to replicate all the main tables.

#load some packages
library(tidyverse)
library(data.table)
library(feather)
library(sandwich)
library(lmtest)
library(ggplot2)
library(stargazer)
library(plm)
library(xtable)


df <- data.table(read_feather("Data/IDmergedfile.feather")) 

everyone <- df
#we only want people that earned above the minimum wage
everyone[, minsalary:= min(salary.2013,salary.2014, na.rm=T), by=list(employeekey)]
dim(everyone); everyone <- everyone[!(minsalary <7.25),]; dim(everyone)
everyone[, MaxSalary:= max(salary.2013, salary.2014, na.rm=T), by=list(employeekey)]

##add the undermin variable
everyone[!is.na(salary.2014) | !is.na(salary.2013), UnderMin:= FALSE] #everyone working in this period
everyone[(!is.na(salary.2013) | !is.na(salary.2014)) & (salary.2013< 8.00 | salary.2014< 8.00) , UnderMin:= TRUE] 

everyone[, underminmain:= UnderMin] #store this in case we overwrite it but want it later. 
summary(everyone$UnderMin)
summary(everyone[(everyone$MaxSalary < 15),]$UnderMin)

quintiles <- quantile(everyone$MaxSalary, probs = seq(0, 1, .2)) #also set up a variable for being in the bottom income quintile (like the NYC workers)
everyone[, underquint := 0]; everyone[(!is.na(salary.2013) | !is.na(salary.2014)) & (salary.2013< quintiles[2] | salary.2014< quintiles[2]) , underquint:= TRUE] 

#function to fit 2016 v 2012 panels for a given dataset
fit_mod_16 <- function(df){
  long <- data.frame(employee=rep(df$'employeekey',2), #salary = rep(df$'BaseSalary.2016',2), 
                     underMin = rep(df$'UnderMin', 2),  voted = c(df$'Voted16', df$'Voted12'), 
                     year = c(rep(16, nrow(df)), rep(12, nrow(df))))
  
  model <- plm(voted~underMin*factor(year), data=long, index=c('employee','year'), model='pooling')
#  model <- felm(voted~underMin*factor(year)|0|0|employee, data=long)
  
  return(model)
}

##everyone
everyone_all <- fit_mod_16(everyone)
everyone_all_se <- sqrt(diag(plm::vcovHC(everyone_all, type = "HC0", cluster='group')))


everyone_under20 <- fit_mod_16(everyone[(everyone$MaxSalary < 20),])
everyone_under20_se <- sqrt(diag(plm::vcovHC(everyone_under20, type = "HC0", cluster='group')))

everyone_under15 <- fit_mod_16(everyone[(everyone$MaxSalary < 15),])
everyone_under15_se <- sqrt(diag(plm::vcovHC(everyone_under15, type = "HC0", cluster='group')))

everyone_under12 <- fit_mod_16(everyone[(everyone$MaxSalary < 12),])
everyone_under12_se <- sqrt(diag(plm::vcovHC(everyone_under12, type = "HC0", cluster='group')))



everyone$UnderMin14 <- everyone$UnderMin 

#function to fit 2014 v 2010 panels for a given dataset
fit_mod_14 <- function(df){
  long <- data.frame(employee=rep(df$'employeekey',2), # salary = rep(df$BaseSalary.2014,2), 
                     underMin = rep(df$'UnderMin14', 2),  voted = c(df$'Voted14', df$'Voted10'), 
                     year = c(rep(14, nrow(df)), rep(10, nrow(df))))
  
  
  model <- plm(voted~underMin*factor(year), data=long, index=c('employee','year'), model='pooling')
  return(model)
}

##everyone
everyone14_all <- fit_mod_14(everyone)
everyone14_all_se <- sqrt(diag(plm::vcovHC(everyone14_all, type = "HC0", cluster='group')))

everyone14_under15 <- fit_mod_14(everyone[( everyone$MaxSalary < 15),])
everyone14_under15_se <- sqrt(diag(plm::vcovHC(everyone14_under15, type = "HC0", cluster='group')))

everyone14_under12 <- fit_mod_14(everyone[(everyone$MaxSalary < 12),])
everyone14_under12_se <- sqrt(diag(plm::vcovHC(everyone14_under12, type = "HC0", cluster='group')))


stargazer(list(everyone14_all, everyone14_under15, everyone14_under12),
          se=list(everyone14_all_se, everyone14_under15_se, everyone14_under12_se), star.cutoffs=.05)

everyoneID <- everyone
save(everyoneID, file="Intermediate Files/IDfortriplediffs.RData") # go read this into main file and do triple-diff

stargazer(everyone_all, everyone_under15,everyone14_all,everyone14_under15,
          se=list(everyone_all_se,everyone_under15_se,everyone14_all_se, everyone14_under15_se), star.cutoffs=.05,
          covariate.labels=c("Under New Min. Wage", "2016", "Under New MW * 2016", "2014", "Under New MW * 2014"),
          dep.var.labels=c("Voted"),
          add.lines = list(c("Included Employees", "All Hourly", "Hourly Under \\$15", "All Hourly", "Hourly Under \\$15")), 
          notes="$^{*}$p$<$0.05", notes.append=FALSE, omit.stat=("f"),
          title="Placebo Individual Difference-in-Differences Estimates, Idaho State Workers",
          out="Tables/IDplacebo_mainests_bothyrs.tex", label="IDplacebo_mainests")

